"""
要求：
    1.爬取数据，前100名世界俱乐部的信息
    2.一些数据的读取
"""

import requests
import parsel
import pandas as pd

# todo 01


def Spider_Info():
    headers = {'User-Agent': r'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/'
                             r'537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36',
               }
    name_list = []
    contry_list = []
    last_number_list = []
    for i in range(1, 3):
        url = f'https://footballdatabase.com/ranking/world/{i}'
        response = requests.get(url, headers)
        html = parsel.Selector(response.text)
        talbe_list = html.xpath('//table[@class="table table-hover"]/tbody')
        # print(talbe_list)
        for m in talbe_list:
            for n in range(1, 52):
                # number = talbe_list.xpath(f'./tr[{n}]/td[1]/text()').get()
                # * 球队名称 国家 历史排名
                name = talbe_list.xpath(f'./tr[{n}]/td[2]/a/div/text()').get()
                contry = talbe_list.xpath(f'./tr[{n}]/td[2]/a/text()').get()
                last_number = talbe_list.xpath(f'./tr[{n}]/td[4]/text()').get()
                # print(name, contry, last_number)
                name_list.append(name)
                contry_list.append(contry)
                last_number_list.append(last_number)
    df = pd.DataFrame({'name': pd.Series(name_list), 'contry': pd.Series(contry_list),
                       'last_number': pd.Series(last_number_list)}, index=[i for i in range(1, 101)])
    df.to_csv(r'D:\Project\Exercise\2021-01-20\01.csv',
              sep=',', index=True, header=True)


# Spider_Info()
# ? pandas读取CSV编码：UnicodeDecodeError: ‘utf-8‘ codec can‘t decode byte 0xb5 in position 0: invalid start
df = pd.read_csv(r'D:\Project\Exercise\2021-01-20\01.csv', sep=',', encoding='unicode_escape')
print(df)
